This document demonstrates the use of the bRF and LASSO-D3S functions for integrative GRN inference.
Those functions infer the regulatory pathways of Arabidopsis thaliana’s roots in response to nitrate (N) induction from Varala et al., 2018.
They use as inputs the expression profiles of N-responsive genes and TFBS information. Prior TFBS information was built by searching in the promoters of the N-responsive genes the PWM of the N-responsive regulators.
load('rdata/inference_input_N_response_varala.rdata')
genes <- input_data$grouped_genes; length(genes)
## [1] 1426
tfs <- input_data$grouped_regressors; length(tfs)
## [1] 201
counts <- input_data$counts; dim(counts)
## [1] 1426 45
load("rdata/pwm_occurrences_N_response_varala.rdata")
dim(pwm_occurrence)
## [1] 1426 201
mean_expr <- rowMeans(counts)
var_expr <- matrixStats::rowSds(counts)*matrixStats::rowSds(counts)
How does the individual MSE of genes varies with \(\alpha\) for model estimation in GRN inferences?
# ALPHAS <- seq(0,1, by = 0.1)
# lmses <- data.frame(row.names = genes)
#
# for(alpha in ALPHAS){
# set.seed(12131)
# lmses[,as.character(alpha)] <- bRF_inference_MSE(counts, genes, tfs, alpha = alpha, nTrees = 4000,
# pwm_occurrence = pwm_occurrence, nCores = 18)
# }
#
# save(lmses, file = "results/logMSES_RFs_per_genes_other_seed.rdata")
Clustering the genes :
load(file = "results/logMSES_RFs_per_genes_400trees.rdata")
plot(colMeans(exp(lmses)/var_expr))
mse_rf <- (lmses-rowMeans(lmses)) / matrixStats::rowSds(as.matrix(lmses))
ha = HeatmapAnnotation(
alpha = anno_simple(as.numeric(rep(colnames(mse_rf),1))),
annotation_name_side = "left")
heatmap_rf_4000 <- Heatmap(lmses, row_km = 3, cluster_columns = F, show_row_names = F,
name = "scaled logMSE (z-score)", top_annotation = ha,
row_title = "Genes", column_title = "alpha");heatmap_rf_4000
# lmses_lasso <- data.frame(row.names = genes)
#
# for(alpha in ALPHAS){
# set.seed(12131)
# lmses_lasso[,as.character(alpha)] <- LASSO.D3S_inference_MSE(counts, genes, tfs,
# alpha = alpha, N = 50,
# pwm_occurrence = pwm_occurrence, nCores = 15)
# }
# save(lmses_lasso, file = "results/logMSES_Lasso_per_genes_other_seed.rdata")
load(file = "results/logMSES_Lasso_per_genes.rdata")
mse_lasso <- (lmses_lasso-rowMeans(lmses_lasso)) / matrixStats::rowSds(as.matrix(lmses_lasso))
plot(colMeans(exp(lmses_lasso)/var_expr))
heatmap_lasso <- Heatmap(mse_lasso, row_km = 4, cluster_columns = F, show_row_names = F,
name = "scaled logMSE (z-score)", top_annotation = ha,
row_title = "Genes", column_title = "alpha"); heatmap_lasso
average_genes <- function(mse){
new_mse <- mse
new_mse[,"0-0.2"] <- rowMeans(new_mse[c("0", "0.1", "0.2")])
new_mse[,"0.3-0.6"] <- rowMeans(new_mse[c("0.3", "0.4", "0.5", "0.6")])
new_mse[,"0.7-1"] <- rowMeans(new_mse[c("0.7", "0.8", "0.9", "1")])
return(new_mse[c("0-0.2", "0.3-0.6", "0.7-1")])
}
smooth_rf <- average_genes(lmses)
smooth_lasso <- average_genes(lmses_lasso)
get_optimum <- function(gene, method = "rf"){
if(method=="rf")
return(names(which.min(smooth_rf[gene,])))
if(method=="lasso")
return(names(which.min(smooth_lasso[gene,])))
}
gene <- rownames(smooth_rf)[1]
clusters_rf <- sapply(genes, get_optimum, method = "rf")
clusters_lasso <- sapply(genes, get_optimum, method = "lasso")
# sum(clusters_lasso==clusters_lasso_other_seed)
table(clusters_rf); table(clusters_lasso)
## clusters_rf
## 0-0.2 0.3-0.6 0.7-1
## 859 295 272
## clusters_lasso
## 0-0.2 0.3-0.6 0.7-1
## 990 279 157
Heatmap(mse_rf,
row_km = 3, cluster_columns = F, show_row_names = F,
name = "MSE/Var", top_annotation = ha,
row_title = "Genes")+ rowAnnotation(
clusters_rf = clusters_rf,
col=list(clusters_rf= setNames(c("darkorange", "yellow", "green"),
nm = names(table(clusters_rf))) ))
Heatmap(mse_lasso,
row_km = 3, cluster_columns = F, show_row_names = F,
name = "MSE/Var", top_annotation = ha,
row_title = "Genes")+ rowAnnotation(
clusters_lasso = clusters_lasso,
col=list(clusters_lasso= setNames(c("darkorange", "yellow", "green"),
nm = names(table(clusters_lasso))) ))
Nombre de motifs dans le promoteur, niveau d’expression, ontologies, sont-ils des TFs? leur taille?
Quelle est la précision rappel des sous réseaux concernant ces gènes.
load("rdata/pwm_prom_jaspar_dap.rdata")
library(patchwork)
# to comment for new version where mse is already normalized per genes
norm_mse <- exp(as.matrix(cbind(lmses, lmses_lasso)))/var_expr
genes_info <- data.frame(genes = genes,
cluster_lasso = clusters_lasso[genes],
cluster_rf = clusters_rf[genes])
genes_info$is_tf <- genes %in% tfs
genes_info$var <- var_expr
genes_info$expr <- mean_expr
genes_info$min_mse <- matrixStats::rowMins(norm_mse)
genes_info$nb_motifs <- table(pwm_prom$target)[genes]
genes_info%>%
ggplot(aes(x=cluster_rf, y=nb_motifs)) +
geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
geom_boxplot(width=0.1, fill = "white")+
ggtitle(("Number of motifs in promoter for RF groups")) +
stat_compare_means() + genes_info%>%
ggplot(aes(x=cluster_lasso, y=nb_motifs)) +
geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
geom_boxplot(width=0.1, fill = "white")+
ggtitle(("Number of motifs in promoter for LASSO groups")) +
stat_compare_means()
## Don't know how to automatically pick scale for object of type <table>.
## Defaulting to continuous.
## Don't know how to automatically pick scale for object of type <table>.
## Defaulting to continuous.
genes_info%>%
ggplot(aes(x=cluster_rf, y=min_mse)) +
geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
geom_boxplot(width=0.1, fill = "white")+
ggtitle(("Min mse for RF groups")) +
stat_compare_means() + genes_info%>%
ggplot(aes(x=cluster_lasso, y=min_mse)) +
geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
geom_boxplot(width=0.1, fill = "white")+
ggtitle(("Min mse for LASSO groups")) +
stat_compare_means()
genes_info%>%
ggplot(aes(x=cluster_rf, y=var)) +
geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
geom_boxplot(width=0.1, fill = "white")+
ggtitle(("Gene variance for RF groups")) + scale_y_log10()+
stat_compare_means()+
genes_info%>%
ggplot(aes(x=cluster_lasso, y=var)) +
geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
geom_boxplot(width=0.1, fill = "white")+
ggtitle(("Gene variance for LASSO groups")) + scale_y_log10()+
stat_compare_means()
genes_info%>%
ggplot(aes(x=cluster_rf, y=expr)) +
geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
geom_boxplot(width=0.1, fill = "white")+
ggtitle(("Gene expression for RF groups")) + scale_y_log10()+
stat_compare_means()+
genes_info%>%
ggplot(aes(x=cluster_lasso, y=expr)) +
geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
geom_boxplot(width=0.1, fill = "white")+
ggtitle(("Gene expression for LASSO groups")) + scale_y_log10()+
stat_compare_means()
genes_info %>%
group_by(cluster_rf) %>%
summarise(n=n(),
tf_frac=sum(is_tf)/n()) %>%
ggplot(aes(x=cluster_rf, y=tf_frac,
label = paste("n=",n))) +
geom_bar(stat = "identity", aes(fill=tf_frac), alpha = 1)+
geom_hline(yintercept = length(tfs)/length(genes)) +
geom_text(y=0.2) + xlab("cluster RF") +
ggtitle("Fraction of TFs in RF groups")+ylim(c(0,0.2))+
genes_info %>%
group_by(cluster_lasso) %>%
summarise(n=n(),
tf_frac=sum(is_tf)/n()) %>%
ggplot(aes(x=cluster_lasso, y=tf_frac,
label = paste("n=",n))) +
geom_bar(stat = "identity", aes(fill=tf_frac), alpha = 1)+
geom_hline(yintercept = length(tfs)/length(genes)) +
geom_text(y=0.2) + xlab("cluster LASSO") +ylim(c(0,0.2))+
ggtitle("Fraction of TFs in LASSO groups")
# Heatmap((as.matrix(targets_per_pwm)-rowMeans(as.matrix(targets_per_pwm)))/matrixStats::rowSds(as.matrix(targets_per_pwm)))
#
# Heatmap((as.matrix(targets_per_pwm)/rowMeans(as.matrix(targets_per_pwm))))
#
# as.matrix(targets_per_pwm)/rowMeans(as.matrix(targets_per_pwm))==matrixStats::rowMaxs(as.matrix(targets_per_pwm)/rowMeans(as.matrix(targets_per_pwm)))
common_pwm_pos <- intersect(names(which(clusters_lasso == "0.7-1")), names(which(clusters_rf == "0.7-1")))
DIANE::get_gene_information(common_pwm_pos, organism = "Arabidopsis thaliana")
length(intersect(common_pwm_pos, tfs))/length(common_pwm_pos)
## [1] 0.2075472
Est-ce qu’on a significativement plus de TFs dans les genes qui sont bien prédits avec les PWM chez le lasso?
fisher.test(matrix(c(length(genes), length(tfs), 157, 157*0.1910828),
nrow = 2), alternative = "greater")
##
## Fisher's Exact Test for Count Data
##
## data: matrix(c(length(genes), length(tfs), 157, 157 * 0.1910828), nrow = 2)
## p-value = 0.09628
## alternative hypothesis: true odds ratio is greater than 1
## 95 percent confidence interval:
## 0.9255823 Inf
## sample estimates:
## odds ratio
## 1.355392
# promoteurs enrichis en certains motifs de TFs?
known_tfs <- tfs[which(tfs %in% pwm_prom$TF)]
get_number_of_motifs_per_tfs <- function(genes){
table(pwm_prom[pwm_prom$target %in% genes & pwm_prom$TF %in% tfs,"TF"])[known_tfs]
}
targets_per_pwm <- data.frame(row.names = known_tfs)
for(group in unique(clusters_lasso)){
targets_per_pwm[paste("lasso", group)] <- get_number_of_motifs_per_tfs(names(
which(clusters_lasso == group)))/sum(clusters_lasso == group)
targets_per_pwm[paste("rf", group)] <- get_number_of_motifs_per_tfs(names(
which(clusters_rf == group)))/sum(clusters_rf == group)
}
enrichments_per_pwm <- data.frame(row.names = known_tfs)
n_genes <- length(genes)
for(group in unique(clusters_lasso)){
# number of motifs in all the genes
n_targets_lasso_in_all <- get_number_of_motifs_per_tfs(genes)
n_targets_lasso_in_group <- get_number_of_motifs_per_tfs(names(
which(clusters_lasso == group)))
n_group_lasso <- length(names(which(clusters_lasso == group)))
n_targets_rf_in_group <- get_number_of_motifs_per_tfs(names(
which(clusters_rf == group)))
n_group_rf <- length(names(which(clusters_rf == group)))
for(tf in known_tfs){
# number of genes with that motif in all genes
n_targets_in_all_tf <- n_targets_lasso_in_all[tf]
# number of genes with that motif in the lasso group
n_targets_lasso_in_group_tf <- n_targets_lasso_in_group[tf]
p_lasso <- phyper(q=n_targets_lasso_in_group_tf-1,
m=n_targets_in_all_tf, #white balls
n=n_genes-n_targets_in_all_tf, # black balls
k=n_group_lasso, lower.tail = FALSE)
# number of genes with that motif in the rf group
n_targets_rf_in_group_tf <- n_targets_rf_in_group[tf]
p_rf <- phyper(q=n_targets_rf_in_group_tf-1,
m=n_targets_in_all_tf,
n=n_genes-n_targets_in_all_tf,
k=n_group_rf, lower.tail = FALSE)
enrichments_per_pwm[tf, paste0(group, "lasso")]<- p_lasso
enrichments_per_pwm[tf, paste0(group, "rf")]<- p_rf
}
}
enrichments_per_pwm[enrichments_per_pwm<0.05] <- 0
enrichments_per_pwm[enrichments_per_pwm>=0.05] <- 1
Heatmap(enrichments_per_pwm)
tfs_rf_pwm_pos <- rownames(enrichments_per_pwm[enrichments_per_pwm$`0.7-1rf`==0,])
tfs_lasso_pwm_pos <- rownames(enrichments_per_pwm[enrichments_per_pwm$`0.7-1lasso`==0,])
DIANE::get_gene_information(tfs_rf_pwm_pos, organism = "Arabidopsis thaliana")
DIANE::get_gene_information(tfs_lasso_pwm_pos, organism = "Arabidopsis thaliana")
DIANE::get_gene_information(intersect(tfs_rf_pwm_pos, tfs_lasso_pwm_pos ), organism = "Arabidopsis thaliana")
tfs_rf_pwm_bad <- rownames(enrichments_per_pwm[enrichments_per_pwm$`0-0.2rf`==0,])
tfs_lasso_pwm_bad <- rownames(enrichments_per_pwm[enrichments_per_pwm$`0-0.2lasso`==0,])
DIANE::get_gene_information(tfs_rf_pwm_bad, organism = "Arabidopsis thaliana")
DIANE::get_gene_information(tfs_lasso_pwm_bad, organism = "Arabidopsis thaliana")
bZIP3 et TGA1 sont dans les motifs qui permettent de réduire l’erreur des cibles pour les deux méthodes. AT3G11280 est dans gaudinier. On n’a que des BZIPs. Plus spécifiquement aux méthodes, on a aussi des HHO qui sortent, DIV1, VRN1.
Ces TFs ont le point commun de se fixer dans les G-box : https://academic.oup.com/plphys/article/175/2/628/6116702 “Many TF family members bind to similar or identical sequence motifs, such as G-boxes (CACGTG), so it is difficult to predict regulatory relationships. We determined that the flanking sequences near G-boxes help determine in vitro specificity but that this is insufficient to predict the transcription pattern of genes near G-boxes. Therefore, we constructed a gene regulatory network that identifies the set of bZIPs and bHLHs that are most predictive of the expression of genes downstream of perfect G-boxes. This network accurately predicts transcriptional patterns and reconstructs known regulatory subnetworks.”
“Since these families are much larger in plants than in the other organisms studied, it is unclear whether these mechanisms are sufficient to explain TF-to-binding site interactions in Arabidopsis. This has been a hindrance to the plant research community, as there have been many cases where a G-box has been identified as critical for the regulation of a gene of interest but researchers were unable to distinguish between the many possible candidate TFs that might regulate that gene”
Les gènes pour lesquels la MSE diminue en utilisant l’information des PWMs ont des enrichissements en motifs pour certains TFs. Ces TFs (et plus précisément ceux dont les motifs sont enrichis dans les groupes bénéficiant de l’intégration pour les deux méthodes) sont annotés comme étant des master regulators de la réponse au nitrate. Cela veut dire qu’on a effectivement constitué des groupes de gènes pour lesquels l’expression dans la réponse à l’induction nitrate peut être mieux prédite avec l’information de la séquence, et qu’ils sont probablement les cibles directes de gènes clés de la réponse au nitrate.
Est-ce que les TFs dont les motifs sont enrichis sont effectivement sélectionnés comme régulateurs dans les réseaux inférés pour les sous groups de gènes pertinents pour l’intégration?
load("results/networks_bRF_lasso.rdata")
get_degree <- function(group, method, TFS){
if(method == "lasso")
genes <- names(which(clusters_lasso == group))
if(method == "rf")
genes <- names(which(clusters_rf == group))
subset <- lapply(edges, function(net){net[net$to %in% genes,]})
net <- subset$bRF_1_0.05_1
table(net$from)["AT3G18990"]
hist(table(net$from), breaks = 100)
max(table(net$to))
}
Oui par exemple VNR1, le plus connecté pour le groupe des RFs, est trouvé dans les motifs enrichis de ce groupe. Ce qui est normal, mais une bonne confirmation quand même.
load("results/networks_bRF_lasso.rdata")
edges <- edges[str_detect(names(edges), "0.075|0.05|0.01")]
color_palette = c( "#EC5D2F","#FE945C", "#FFC08E" )
settings <- c("method", "alpha", "density", "rep")
prettyZero <- function(l){
max.decimals = max(nchar(str_extract(l, "\\.[0-9]+")), na.rm = T)-1
lnew = formatC(l, replace.zero = T, zero.print = "0",
digits = max.decimals, format = "f", preserve.width=T)
return(lnew)}
get_precision <- function(group, method, validation = c("TARGET", "CHIPSeq", "DAPSeq")){
if(method == "lasso")
genes <- names(which(clusters_lasso == group))
if(method == "rf")
genes <- names(which(clusters_rf == group))
subset <- lapply(edges, function(net){net[net$to %in% genes,]})
val_conn <-
evaluate_networks(
subset,
validation = validation,
nCores = 10,
input_genes = genes,
input_tfs = tfs
)
val_conn[, settings] <-
str_split_fixed(val_conn$network_name, '_', length(settings))
val_conn$group <- group
val_conn$method_mse <- method
if(method == "rf")
val_conn<-val_conn[val_conn$method != "LASSO-D3S",]
if(method == "lasso")
val_conn<-val_conn[val_conn$method == "LASSO-D3S",]
return(val_conn)
}
draw_precision <- function(validation){
precision <- data.frame()
for(group in unique(clusters_lasso)){
for(method in c("rf", "lasso")){
if(nrow(precision)==0)
precision<- get_precision(group, method, validation = validation)
else
precision <- rbind.data.frame(precision, get_precision(group, method, validation = validation))
}
}
(precision %>%
dplyr::select(-network_name) %>%
mutate(alpha = as.numeric(alpha)) %>%
ggplot(aes(color = density, x = alpha, y = precision)) +
ggh4x::facet_nested_wrap(vars(method, group), ncol = 8, nest_line = T) + geom_point() +
geom_smooth(aes(fill = density), alpha = 0.1) +
theme(
strip.background = element_blank(),
axis.title.x = element_text(size = 22),
title = element_text(size = 16),
strip.text = element_text(size = 16),
legend.text = element_text(size = 15),
axis.text = element_text(size = 12),
legend.position = 'top'
) +
xlab(expression(alpha)) + ylab("Precision") +
ggtitle("Precision against ConnecTF") +
scale_x_continuous(labels = prettyZero) +
scale_color_manual(name = "Network density", values = color_palette) +
scale_fill_manual(name = "Network density", values = color_palette))/(
precision %>%
dplyr::select(-network_name) %>%
mutate(alpha = as.numeric(alpha)) %>%
ggplot(aes(color = density, x = alpha, y = recall)) +
ggh4x::facet_nested_wrap(vars(method, group), ncol = 8, nest_line = T) + geom_point() +
geom_smooth(aes(fill = density), alpha = 0.1) +
theme(
strip.background = element_blank(),
axis.title.x = element_text(size = 22),
title = element_text(size = 16),
strip.text = element_text(size = 16),
legend.text = element_text(size = 15),
axis.text = element_text(size = 12),
legend.position = 'none'
) +
xlab(expression(alpha)) + ylab("Recall") +
ggtitle("Recall against ConnecTF") +
scale_x_continuous(labels = prettyZero) +
scale_color_manual(name = "Network density", values = color_palette) +
scale_fill_manual(name = "Network density", values = color_palette))
}
draw_precision(validation = "TARGET")
##
## Parallel network validation with AranetBench Using 10 cores.
## 2.236 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 1.776 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 4.229 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 4.841 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 2.359 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 2.319 sec elapsed
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
draw_precision(validation = c( "CHIPSeq"))
##
## Parallel network validation with AranetBench Using 10 cores.
## 2.169 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 1.608 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 5.199 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 5.993 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 2.433 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 2.3 sec elapsed
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
draw_precision(validation = c( "DAPSeq"))
##
## Parallel network validation with AranetBench Using 10 cores.
## 4.272 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 3.526 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 7.154 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 8.081 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 4.307 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 4.132 sec elapsed
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
# draw_precision(validation = c("TARGET", "CHIPSeq"))
draw_precision(validation = c("TARGET", "CHIPSeq", "DAPSeq"))
##
## Parallel network validation with AranetBench Using 10 cores.
## 6.059 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 5.439 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 9.662 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 10.611 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 6.229 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 6.011 sec elapsed
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
library(DIANE)
##
## Attaching package: 'DIANE'
## The following object is masked from 'package:igraph':
##
## normalize
background <- convert_from_agi(genes)
##
for(group in unique(clusters_lasso)){
genes_i <- names(which(clusters_lasso == group))
print(paste("LASSO", length(genes_i), "genes,", group, "\n"))
genes_i <- convert_from_agi(genes_i)
go_lasso <- enrich_go(genes_i, background)
DIANE::draw_enrich_go(go_lasso)
go_lasso
genes_i <- names(which(clusters_rf == group))
print(paste("RF", length(genes_i), "genes, min mse at", group))
genes_i <- convert_from_agi(genes_i)
go_rf <- enrich_go(genes_i, background)
DIANE::draw_enrich_go(go_rf)
go_rf
# common_go <- intersect(go_lasso$ID, go_rf$ID)
# print(go_lasso[common_go,])
}
## [1] "LASSO 157 genes, 0.7-1 \n"
##
## [1] "RF 272 genes, min mse at 0.7-1"
## [1] "LASSO 990 genes, 0-0.2 \n"
## [1] "RF 859 genes, min mse at 0-0.2"
## [1] "LASSO 279 genes, 0.3-0.6 \n"
## [1] "RF 295 genes, min mse at 0.3-0.6"
# go_common <- enrich_go(convert_from_agi(common_pwm_pos), background)
# draw_enrich_go(go_common)
go_all <- enrich_go(convert_from_agi(genes), convert_from_agi(rownames(gene_annotations$`Arabidopsis thaliana`)))
draw_enrich_go(go_all)
Rien de spécial n’est trouvé en lien avec les ontologies enrichies dans certains groupes, la liste de base est enrichie en ribosomal functions, et ça se retrouve dans des sous groupes comparés à l’ensemble du génome. comparés à la liste de base, les sous groups ne sont quasiment pas enrichis…
expression <- data.frame(counts)
expression <- (expression-rowMeans(expression)) / matrixStats::rowSds(as.matrix(expression))
Heatmap(expression, cluster_columns = F, show_row_names = F)+ rowAnnotation(
clusters_lasso = clusters_lasso, clusters_rf = clusters_rf,
col=list(clusters_lasso= setNames(c("darkorange", "yellow", "green"),
nm = names(table(clusters_lasso))),
clusters_rf= setNames(c("darkorange", "yellow", "green"),
nm = names(table(clusters_rf)))))
expression$genes <- rownames(expression)
reshape2::melt(expression) %>%
mutate(time = extract_numeric(str_split_fixed(variable, '_', 2)[,1]),
rep = str_split_fixed(variable, '_', 2)[,2],
trt = substr(variable,1,1),
group_lasso = clusters_lasso[genes],
group_rf = clusters_rf[genes]) %>%
filter(trt!="T")%>%
ggplot(aes(x=time, y=value, col = group_lasso, group =genes))+
geom_line(alpha = 0.1) +
facet_wrap(group_lasso~ trt, nrow = 1)
## Using genes as id variables
## extract_numeric() is deprecated: please use readr::parse_number() instead
reshape2::melt(expression) %>%
mutate(time = extract_numeric(str_split_fixed(variable, '_', 2)[,1]),
rep = str_split_fixed(variable, '_', 2)[,2],
trt = substr(variable,1,1),
group_lasso = clusters_lasso[genes],
group_rf = clusters_rf[genes]) %>%
filter(trt!="T")%>%
ggplot(aes(x=time, y=value, col = group_rf, group =genes))+
geom_line(alpha = 0.1) +
facet_wrap(group_rf~ trt, nrow = 1)
## Using genes as id variables
## extract_numeric() is deprecated: please use readr::parse_number() instead